{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Holiday Package Prediciton\n",
    "\n",
    "### 1) Problem statement.\n",
    "\"Trips & Travel.Com\" company wants to enable and establish a viable business model to expand the customer base.\n",
    "One of the ways to expand the customer base is to introduce a new offering of packages. Currently, there are 5 types of packages the company is offering * Basic, Standard, Deluxe, Super Deluxe, King. Looking at the data of the last year, we observed that 18% of the customers purchased the packages. However, the marketing cost was quite high because customers were contacted at random without looking at the available information.\n",
    "The company is now planning to launch a new product i.e. Wellness Tourism Package. Wellness Tourism is defined as Travel that allows the traveler to maintain, enhance or kick-start a healthy lifestyle, and support or increase one's sense of well-being.\n",
    "However, this time company wants to harness the available data of existing and potential customers to make the marketing expenditure more efficient.\n",
    "### 2) Data Collection.\n",
    "The Dataset is collected from https://www.kaggle.com/datasets/susant4learning/holiday-package-purchase-prediction\n",
    "The data consists of 20 column and 4888 rows."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "## importing important libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import plotly.express as px\n",
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>ProdTaken</th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfPersonVisiting</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>NumberOfChildrenVisiting</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>200000</td>\n",
       "      <td>1</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Single</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200001</td>\n",
       "      <td>0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>200002</td>\n",
       "      <td>1</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Single</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>200003</td>\n",
       "      <td>0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>200004</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   CustomerID  ProdTaken   Age    TypeofContact  CityTier  DurationOfPitch  \\\n",
       "0      200000          1  41.0     Self Enquiry         3              6.0   \n",
       "1      200001          0  49.0  Company Invited         1             14.0   \n",
       "2      200002          1  37.0     Self Enquiry         1              8.0   \n",
       "3      200003          0  33.0  Company Invited         1              9.0   \n",
       "4      200004          0   NaN     Self Enquiry         1              8.0   \n",
       "\n",
       "       Occupation  Gender  NumberOfPersonVisiting  NumberOfFollowups  \\\n",
       "0        Salaried  Female                       3                3.0   \n",
       "1        Salaried    Male                       3                4.0   \n",
       "2     Free Lancer    Male                       3                4.0   \n",
       "3        Salaried  Female                       2                3.0   \n",
       "4  Small Business    Male                       2                3.0   \n",
       "\n",
       "  ProductPitched  PreferredPropertyStar MaritalStatus  NumberOfTrips  \\\n",
       "0         Deluxe                    3.0        Single            1.0   \n",
       "1         Deluxe                    4.0      Divorced            2.0   \n",
       "2          Basic                    3.0        Single            7.0   \n",
       "3          Basic                    3.0      Divorced            2.0   \n",
       "4          Basic                    4.0      Divorced            1.0   \n",
       "\n",
       "   Passport  PitchSatisfactionScore  OwnCar  NumberOfChildrenVisiting  \\\n",
       "0         1                       2       1                       0.0   \n",
       "1         0                       3       1                       2.0   \n",
       "2         1                       3       0                       0.0   \n",
       "3         1                       5       1                       1.0   \n",
       "4         0                       5       1                       0.0   \n",
       "\n",
       "  Designation  MonthlyIncome  \n",
       "0     Manager        20993.0  \n",
       "1     Manager        20130.0  \n",
       "2   Executive        17090.0  \n",
       "3   Executive        17909.0  \n",
       "4   Executive        18468.0  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"Travel.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Cleaning\n",
    "### Handling Missing values\n",
    "1. Handling Missing values\n",
    "2. Handling Duplicates\n",
    "3. Check data type\n",
    "4. Understand the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CustomerID                    0\n",
       "ProdTaken                     0\n",
       "Age                         226\n",
       "TypeofContact                25\n",
       "CityTier                      0\n",
       "DurationOfPitch             251\n",
       "Occupation                    0\n",
       "Gender                        0\n",
       "NumberOfPersonVisiting        0\n",
       "NumberOfFollowups            45\n",
       "ProductPitched                0\n",
       "PreferredPropertyStar        26\n",
       "MaritalStatus                 0\n",
       "NumberOfTrips               140\n",
       "Passport                      0\n",
       "PitchSatisfactionScore        0\n",
       "OwnCar                        0\n",
       "NumberOfChildrenVisiting     66\n",
       "Designation                   0\n",
       "MonthlyIncome               233\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Male       2916\n",
       "Female     1817\n",
       "Fe Male     155\n",
       "Name: Gender, dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### Check all the categories \n",
    "df['Gender'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Married      2340\n",
       "Divorced      950\n",
       "Single        916\n",
       "Unmarried     682\n",
       "Name: MaritalStatus, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['MaritalStatus'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Self Enquiry       3444\n",
       "Company Invited    1419\n",
       "Name: TypeofContact, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['TypeofContact'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Gender'] = df['Gender'].replace('Fe Male', 'Female')\n",
    "df['MaritalStatus'] = df['MaritalStatus'].replace('Single', 'Unmarried')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Male      2916\n",
       "Female    1972\n",
       "Name: Gender, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "### Check all the categories \n",
    "df['Gender'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerID</th>\n",
       "      <th>ProdTaken</th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfPersonVisiting</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>NumberOfChildrenVisiting</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>200000</td>\n",
       "      <td>1</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200001</td>\n",
       "      <td>0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>200002</td>\n",
       "      <td>1</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>200003</td>\n",
       "      <td>0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>200004</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   CustomerID  ProdTaken   Age    TypeofContact  CityTier  DurationOfPitch  \\\n",
       "0      200000          1  41.0     Self Enquiry         3              6.0   \n",
       "1      200001          0  49.0  Company Invited         1             14.0   \n",
       "2      200002          1  37.0     Self Enquiry         1              8.0   \n",
       "3      200003          0  33.0  Company Invited         1              9.0   \n",
       "4      200004          0   NaN     Self Enquiry         1              8.0   \n",
       "\n",
       "       Occupation  Gender  NumberOfPersonVisiting  NumberOfFollowups  \\\n",
       "0        Salaried  Female                       3                3.0   \n",
       "1        Salaried    Male                       3                4.0   \n",
       "2     Free Lancer    Male                       3                4.0   \n",
       "3        Salaried  Female                       2                3.0   \n",
       "4  Small Business    Male                       2                3.0   \n",
       "\n",
       "  ProductPitched  PreferredPropertyStar MaritalStatus  NumberOfTrips  \\\n",
       "0         Deluxe                    3.0     Unmarried            1.0   \n",
       "1         Deluxe                    4.0      Divorced            2.0   \n",
       "2          Basic                    3.0     Unmarried            7.0   \n",
       "3          Basic                    3.0      Divorced            2.0   \n",
       "4          Basic                    4.0      Divorced            1.0   \n",
       "\n",
       "   Passport  PitchSatisfactionScore  OwnCar  NumberOfChildrenVisiting  \\\n",
       "0         1                       2       1                       0.0   \n",
       "1         0                       3       1                       2.0   \n",
       "2         1                       3       0                       0.0   \n",
       "3         1                       5       1                       1.0   \n",
       "4         0                       5       1                       0.0   \n",
       "\n",
       "  Designation  MonthlyIncome  \n",
       "0     Manager        20993.0  \n",
       "1     Manager        20130.0  \n",
       "2   Executive        17090.0  \n",
       "3   Executive        17909.0  \n",
       "4   Executive        18468.0  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age 4.62357 % missing values\n",
      "TypeofContact 0.51146 % missing values\n",
      "DurationOfPitch 5.13502 % missing values\n",
      "NumberOfFollowups 0.92062 % missing values\n",
      "PreferredPropertyStar 0.53191 % missing values\n",
      "NumberOfTrips 2.86416 % missing values\n",
      "NumberOfChildrenVisiting 1.35025 % missing values\n",
      "MonthlyIncome 4.76678 % missing values\n"
     ]
    }
   ],
   "source": [
    "## Check Misssing Values\n",
    "##these are the features with nan value\n",
    "features_with_na=[features for features in df.columns if df[features].isnull().sum()>=1]\n",
    "for feature in features_with_na:\n",
    "    print(feature,np.round(df[feature].isnull().mean()*100,5), '% missing values')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>NumberOfChildrenVisiting</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4662.000000</td>\n",
       "      <td>4637.000000</td>\n",
       "      <td>4843.000000</td>\n",
       "      <td>4862.000000</td>\n",
       "      <td>4748.000000</td>\n",
       "      <td>4822.000000</td>\n",
       "      <td>4655.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>37.622265</td>\n",
       "      <td>15.490835</td>\n",
       "      <td>3.708445</td>\n",
       "      <td>3.581037</td>\n",
       "      <td>3.236521</td>\n",
       "      <td>1.187267</td>\n",
       "      <td>23619.853491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.316387</td>\n",
       "      <td>8.519643</td>\n",
       "      <td>1.002509</td>\n",
       "      <td>0.798009</td>\n",
       "      <td>1.849019</td>\n",
       "      <td>0.857861</td>\n",
       "      <td>5380.698361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>18.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>31.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>20346.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>36.000000</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>22347.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>44.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>25571.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>61.000000</td>\n",
       "      <td>127.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>98678.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Age  DurationOfPitch  NumberOfFollowups  PreferredPropertyStar  \\\n",
       "count  4662.000000      4637.000000        4843.000000            4862.000000   \n",
       "mean     37.622265        15.490835           3.708445               3.581037   \n",
       "std       9.316387         8.519643           1.002509               0.798009   \n",
       "min      18.000000         5.000000           1.000000               3.000000   \n",
       "25%      31.000000         9.000000           3.000000               3.000000   \n",
       "50%      36.000000        13.000000           4.000000               3.000000   \n",
       "75%      44.000000        20.000000           4.000000               4.000000   \n",
       "max      61.000000       127.000000           6.000000               5.000000   \n",
       "\n",
       "       NumberOfTrips  NumberOfChildrenVisiting  MonthlyIncome  \n",
       "count    4748.000000               4822.000000    4655.000000  \n",
       "mean        3.236521                  1.187267   23619.853491  \n",
       "std         1.849019                  0.857861    5380.698361  \n",
       "min         1.000000                  0.000000    1000.000000  \n",
       "25%         2.000000                  1.000000   20346.000000  \n",
       "50%         3.000000                  1.000000   22347.000000  \n",
       "75%         4.000000                  2.000000   25571.000000  \n",
       "max        22.000000                  3.000000   98678.000000  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# statistics on numerical columns (Null cols)\n",
    "df[features_with_na].select_dtypes(exclude='object').describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imputing Null values\n",
    "1. Impute Median value for Age column\n",
    "2. Impute Mode for Type of Contract\n",
    "3. Impute Median for Duration of Pitch\n",
    "4. Impute Mode for NumberofFollowup as it is Discrete feature\n",
    "5. Impute Mode for PreferredPropertyStar\n",
    "6. Impute Median for NumberofTrips\n",
    "7. Impute Mode for NumberOfChildrenVisiting\n",
    "8. Impute Median for MonthlyIncome"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Age\n",
    "df.Age.fillna(df.Age.median(), inplace=True)\n",
    "\n",
    "#TypeofContract\n",
    "df.TypeofContact.fillna(df.TypeofContact.mode()[0], inplace=True)\n",
    "\n",
    "#DurationOfPitch\n",
    "df.DurationOfPitch.fillna(df.DurationOfPitch.median(), inplace=True)\n",
    "\n",
    "#NumberOfFollowups\n",
    "df.NumberOfFollowups.fillna(df.NumberOfFollowups.mode()[0], inplace=True)\n",
    "\n",
    "#PreferredPropertyStar\n",
    "df.PreferredPropertyStar.fillna(df.PreferredPropertyStar.mode()[0], inplace=True)\n",
    "\n",
    "#NumberOfTrips\n",
    "df.NumberOfTrips.fillna(df.NumberOfTrips.median(), inplace=True)\n",
    "\n",
    "#NumberOfChildrenVisiting\n",
    "df.NumberOfChildrenVisiting.fillna(df.NumberOfChildrenVisiting.mode()[0], inplace=True)\n",
    "\n",
    "#MonthlyIncome\n",
    "df.MonthlyIncome.fillna(df.MonthlyIncome.median(), inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CustomerID                  0\n",
       "ProdTaken                   0\n",
       "Age                         0\n",
       "TypeofContact               0\n",
       "CityTier                    0\n",
       "DurationOfPitch             0\n",
       "Occupation                  0\n",
       "Gender                      0\n",
       "NumberOfPersonVisiting      0\n",
       "NumberOfFollowups           0\n",
       "ProductPitched              0\n",
       "PreferredPropertyStar       0\n",
       "MaritalStatus               0\n",
       "NumberOfTrips               0\n",
       "Passport                    0\n",
       "PitchSatisfactionScore      0\n",
       "OwnCar                      0\n",
       "NumberOfChildrenVisiting    0\n",
       "Designation                 0\n",
       "MonthlyIncome               0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()\n",
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('CustomerID', inplace=True, axis=1)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature Engineering\n",
    "\n",
    "### Feature Extraction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ProdTaken</th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfPersonVisiting</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>NumberOfChildrenVisiting</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ProdTaken   Age    TypeofContact  CityTier  DurationOfPitch  \\\n",
       "0          1  41.0     Self Enquiry         3              6.0   \n",
       "1          0  49.0  Company Invited         1             14.0   \n",
       "2          1  37.0     Self Enquiry         1              8.0   \n",
       "3          0  33.0  Company Invited         1              9.0   \n",
       "4          0  36.0     Self Enquiry         1              8.0   \n",
       "\n",
       "       Occupation  Gender  NumberOfPersonVisiting  NumberOfFollowups  \\\n",
       "0        Salaried  Female                       3                3.0   \n",
       "1        Salaried    Male                       3                4.0   \n",
       "2     Free Lancer    Male                       3                4.0   \n",
       "3        Salaried  Female                       2                3.0   \n",
       "4  Small Business    Male                       2                3.0   \n",
       "\n",
       "  ProductPitched  PreferredPropertyStar MaritalStatus  NumberOfTrips  \\\n",
       "0         Deluxe                    3.0     Unmarried            1.0   \n",
       "1         Deluxe                    4.0      Divorced            2.0   \n",
       "2          Basic                    3.0     Unmarried            7.0   \n",
       "3          Basic                    3.0      Divorced            2.0   \n",
       "4          Basic                    4.0      Divorced            1.0   \n",
       "\n",
       "   Passport  PitchSatisfactionScore  OwnCar  NumberOfChildrenVisiting  \\\n",
       "0         1                       2       1                       0.0   \n",
       "1         0                       3       1                       2.0   \n",
       "2         1                       3       0                       0.0   \n",
       "3         1                       5       1                       1.0   \n",
       "4         0                       5       1                       0.0   \n",
       "\n",
       "  Designation  MonthlyIncome  \n",
       "0     Manager        20993.0  \n",
       "1     Manager        20130.0  \n",
       "2   Executive        17090.0  \n",
       "3   Executive        17909.0  \n",
       "4   Executive        18468.0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create new column for feature\n",
    "df['TotalVisiting'] = df['NumberOfPersonVisiting'] + df['NumberOfChildrenVisiting']\n",
    "df.drop(columns=['NumberOfPersonVisiting', 'NumberOfChildrenVisiting'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num of Numerical Features : 12\n"
     ]
    }
   ],
   "source": [
    "## get all the numeric features\n",
    "num_features = [feature for feature in df.columns if df[feature].dtype != 'O']\n",
    "print('Num of Numerical Features :', len(num_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num of Categorical Features : 6\n"
     ]
    }
   ],
   "source": [
    "##categorical features\n",
    "cat_features = [feature for feature in df.columns if df[feature].dtype == 'O']\n",
    "print('Num of Categorical Features :', len(cat_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num of Discrete Features : 9\n"
     ]
    }
   ],
   "source": [
    "## Discrete features\n",
    "discrete_features=[feature for feature in num_features if len(df[feature].unique())<=25]\n",
    "print('Num of Discrete Features :',len(discrete_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num of Continuous Features : 3\n"
     ]
    }
   ],
   "source": [
    "## coontinuous features\n",
    "continuous_features=[feature for feature in num_features if feature not in discrete_features]\n",
    "print('Num of Continuous Features :',len(continuous_features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ProdTaken</th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>TotalVisiting</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ProdTaken   Age    TypeofContact  CityTier  DurationOfPitch  \\\n",
       "0          1  41.0     Self Enquiry         3              6.0   \n",
       "1          0  49.0  Company Invited         1             14.0   \n",
       "2          1  37.0     Self Enquiry         1              8.0   \n",
       "3          0  33.0  Company Invited         1              9.0   \n",
       "4          0  36.0     Self Enquiry         1              8.0   \n",
       "\n",
       "       Occupation  Gender  NumberOfFollowups ProductPitched  \\\n",
       "0        Salaried  Female                3.0         Deluxe   \n",
       "1        Salaried    Male                4.0         Deluxe   \n",
       "2     Free Lancer    Male                4.0          Basic   \n",
       "3        Salaried  Female                3.0          Basic   \n",
       "4  Small Business    Male                3.0          Basic   \n",
       "\n",
       "   PreferredPropertyStar MaritalStatus  NumberOfTrips  Passport  \\\n",
       "0                    3.0     Unmarried            1.0         1   \n",
       "1                    4.0      Divorced            2.0         0   \n",
       "2                    3.0     Unmarried            7.0         1   \n",
       "3                    3.0      Divorced            2.0         1   \n",
       "4                    4.0      Divorced            1.0         0   \n",
       "\n",
       "   PitchSatisfactionScore  OwnCar Designation  MonthlyIncome  TotalVisiting  \n",
       "0                       2       1     Manager        20993.0            3.0  \n",
       "1                       3       1     Manager        20130.0            5.0  \n",
       "2                       3       0   Executive        17090.0            3.0  \n",
       "3                       5       1   Executive        17909.0            3.0  \n",
       "4                       5       1   Executive        18468.0            2.0  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train Test Split And Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X = df.drop(['ProdTaken'], axis=1)\n",
    "y = df['ProdTaken']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>TotalVisiting</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>36.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age    TypeofContact  CityTier  DurationOfPitch      Occupation  Gender  \\\n",
       "0  41.0     Self Enquiry         3              6.0        Salaried  Female   \n",
       "1  49.0  Company Invited         1             14.0        Salaried    Male   \n",
       "2  37.0     Self Enquiry         1              8.0     Free Lancer    Male   \n",
       "3  33.0  Company Invited         1              9.0        Salaried  Female   \n",
       "4  36.0     Self Enquiry         1              8.0  Small Business    Male   \n",
       "\n",
       "   NumberOfFollowups ProductPitched  PreferredPropertyStar MaritalStatus  \\\n",
       "0                3.0         Deluxe                    3.0     Unmarried   \n",
       "1                4.0         Deluxe                    4.0      Divorced   \n",
       "2                4.0          Basic                    3.0     Unmarried   \n",
       "3                3.0          Basic                    3.0      Divorced   \n",
       "4                3.0          Basic                    4.0      Divorced   \n",
       "\n",
       "   NumberOfTrips  Passport  PitchSatisfactionScore  OwnCar Designation  \\\n",
       "0            1.0         1                       2       1     Manager   \n",
       "1            2.0         0                       3       1     Manager   \n",
       "2            7.0         1                       3       0   Executive   \n",
       "3            2.0         1                       5       1   Executive   \n",
       "4            1.0         0                       5       1   Executive   \n",
       "\n",
       "   MonthlyIncome  TotalVisiting  \n",
       "0        20993.0            3.0  \n",
       "1        20130.0            5.0  \n",
       "2        17090.0            3.0  \n",
       "3        17909.0            3.0  \n",
       "4        18468.0            2.0  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3968\n",
       "1     920\n",
       "Name: ProdTaken, dtype: int64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>TypeofContact</th>\n",
       "      <th>CityTier</th>\n",
       "      <th>DurationOfPitch</th>\n",
       "      <th>Occupation</th>\n",
       "      <th>Gender</th>\n",
       "      <th>NumberOfFollowups</th>\n",
       "      <th>ProductPitched</th>\n",
       "      <th>PreferredPropertyStar</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>NumberOfTrips</th>\n",
       "      <th>Passport</th>\n",
       "      <th>PitchSatisfactionScore</th>\n",
       "      <th>OwnCar</th>\n",
       "      <th>Designation</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>TotalVisiting</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20993.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Deluxe</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Manager</td>\n",
       "      <td>20130.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Free Lancer</td>\n",
       "      <td>Male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Unmarried</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17090.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33.0</td>\n",
       "      <td>Company Invited</td>\n",
       "      <td>1</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Salaried</td>\n",
       "      <td>Female</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>17909.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>36.0</td>\n",
       "      <td>Self Enquiry</td>\n",
       "      <td>1</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Small Business</td>\n",
       "      <td>Male</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Basic</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Executive</td>\n",
       "      <td>18468.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Age    TypeofContact  CityTier  DurationOfPitch      Occupation  Gender  \\\n",
       "0  41.0     Self Enquiry         3              6.0        Salaried  Female   \n",
       "1  49.0  Company Invited         1             14.0        Salaried    Male   \n",
       "2  37.0     Self Enquiry         1              8.0     Free Lancer    Male   \n",
       "3  33.0  Company Invited         1              9.0        Salaried  Female   \n",
       "4  36.0     Self Enquiry         1              8.0  Small Business    Male   \n",
       "\n",
       "   NumberOfFollowups ProductPitched  PreferredPropertyStar MaritalStatus  \\\n",
       "0                3.0         Deluxe                    3.0     Unmarried   \n",
       "1                4.0         Deluxe                    4.0      Divorced   \n",
       "2                4.0          Basic                    3.0     Unmarried   \n",
       "3                3.0          Basic                    3.0      Divorced   \n",
       "4                3.0          Basic                    4.0      Divorced   \n",
       "\n",
       "   NumberOfTrips  Passport  PitchSatisfactionScore  OwnCar Designation  \\\n",
       "0            1.0         1                       2       1     Manager   \n",
       "1            2.0         0                       3       1     Manager   \n",
       "2            7.0         1                       3       0   Executive   \n",
       "3            2.0         1                       5       1   Executive   \n",
       "4            1.0         0                       5       1   Executive   \n",
       "\n",
       "   MonthlyIncome  TotalVisiting  \n",
       "0        20993.0            3.0  \n",
       "1        20130.0            5.0  \n",
       "2        17090.0            3.0  \n",
       "3        17909.0            3.0  \n",
       "4        18468.0            2.0  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((3910, 17), (978, 17))"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# separate dataset into train and test\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4888 entries, 0 to 4887\n",
      "Data columns (total 17 columns):\n",
      " #   Column                  Non-Null Count  Dtype  \n",
      "---  ------                  --------------  -----  \n",
      " 0   Age                     4888 non-null   float64\n",
      " 1   TypeofContact           4888 non-null   object \n",
      " 2   CityTier                4888 non-null   int64  \n",
      " 3   DurationOfPitch         4888 non-null   float64\n",
      " 4   Occupation              4888 non-null   object \n",
      " 5   Gender                  4888 non-null   object \n",
      " 6   NumberOfFollowups       4888 non-null   float64\n",
      " 7   ProductPitched          4888 non-null   object \n",
      " 8   PreferredPropertyStar   4888 non-null   float64\n",
      " 9   MaritalStatus           4888 non-null   object \n",
      " 10  NumberOfTrips           4888 non-null   float64\n",
      " 11  Passport                4888 non-null   int64  \n",
      " 12  PitchSatisfactionScore  4888 non-null   int64  \n",
      " 13  OwnCar                  4888 non-null   int64  \n",
      " 14  Designation             4888 non-null   object \n",
      " 15  MonthlyIncome           4888 non-null   float64\n",
      " 16  TotalVisiting           4888 non-null   float64\n",
      "dtypes: float64(7), int64(4), object(6)\n",
      "memory usage: 649.3+ KB\n"
     ]
    }
   ],
   "source": [
    "X.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Column Transformer with 3 types of transformers\n",
    "cat_features = X.select_dtypes(include=\"object\").columns\n",
    "num_features = X.select_dtypes(exclude=\"object\").columns\n",
    "\n",
    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
    "from sklearn.compose import ColumnTransformer\n",
    "\n",
    "numeric_transformer = StandardScaler()\n",
    "oh_transformer = OneHotEncoder(drop='first')\n",
    "\n",
    "preprocessor = ColumnTransformer(\n",
    "    [\n",
    "         (\"OneHotEncoder\", oh_transformer, cat_features),\n",
    "          (\"StandardScaler\", numeric_transformer, num_features)\n",
    "    ]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ColumnTransformer(transformers=[('OneHotEncoder', OneHotEncoder(drop='first'),\n",
       "                                 Index(['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',\n",
       "       'MaritalStatus', 'Designation'],\n",
       "      dtype='object')),\n",
       "                                ('StandardScaler', StandardScaler(),\n",
       "                                 Index(['Age', 'CityTier', 'DurationOfPitch', 'NumberOfFollowups',\n",
       "       'PreferredPropertyStar', 'NumberOfTrips', 'Passport',\n",
       "       'PitchSatisfactionScore', 'OwnCar', 'MonthlyIncome', 'TotalVisiting'],\n",
       "      dtype='object'))])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preprocessor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "## applying Trnsformation in training(fit_transform)\n",
    "X_train=preprocessor.fit_transform(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>16</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.382245</td>\n",
       "      <td>-0.774151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>0.690023</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>1.511598</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.459799</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>1.771041</td>\n",
       "      <td>0.418708</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.245196</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>1.408395</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>0.213475</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>2.400396</td>\n",
       "      <td>-1.720227</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>1.511598</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-0.049015</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>-0.024889</td>\n",
       "      <td>2.061382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3905</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-0.653841</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.674182</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-1.506426</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.536973</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3906</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>-0.898180</td>\n",
       "      <td>-0.718725</td>\n",
       "      <td>1.771041</td>\n",
       "      <td>-1.220627</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>1.408395</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>1.529609</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3907</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>1.545210</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>2.058043</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-0.777720</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.360576</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3908</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>1.789549</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-1.506426</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.252799</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3909</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-0.776011</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-1.220627</td>\n",
       "      <td>1.581280</td>\n",
       "      <td>-0.049015</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>-1.082511</td>\n",
       "      <td>-1.483035</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3910 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1    2    3    4    5    6    7    8    9   ...        16  \\\n",
       "0     1.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "1     1.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  ... -0.721400   \n",
       "2     1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "3     1.0  0.0  1.0  0.0  1.0  1.0  0.0  0.0  0.0  1.0  ... -0.721400   \n",
       "4     0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...       ...   \n",
       "3905  1.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "3906  1.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  1.455047   \n",
       "3907  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  1.455047   \n",
       "3908  1.0  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  ...  1.455047   \n",
       "3909  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "\n",
       "            17        18        19        20        21        22        23  \\\n",
       "0    -1.020350  1.284279 -0.725271 -0.127737 -0.632399  0.679690  0.782966   \n",
       "1     0.690023  0.282777 -0.725271  1.511598 -0.632399  0.679690  0.782966   \n",
       "2    -1.020350  0.282777  1.771041  0.418708 -0.632399  0.679690  0.782966   \n",
       "3    -1.020350  1.284279 -0.725271 -0.127737 -0.632399  1.408395 -1.277194   \n",
       "4     2.400396 -1.720227 -0.725271  1.511598 -0.632399 -0.049015 -1.277194   \n",
       "...        ...       ...       ...       ...       ...       ...       ...   \n",
       "3905 -0.653841  1.284279 -0.725271 -0.674182 -0.632399 -1.506426  0.782966   \n",
       "3906 -0.898180 -0.718725  1.771041 -1.220627 -0.632399  1.408395  0.782966   \n",
       "3907  1.545210  0.282777 -0.725271  2.058043 -0.632399 -0.777720  0.782966   \n",
       "3908  1.789549  1.284279 -0.725271 -0.127737 -0.632399 -1.506426  0.782966   \n",
       "3909 -0.776011  0.282777 -0.725271 -1.220627  1.581280 -0.049015 -1.277194   \n",
       "\n",
       "            24        25  \n",
       "0    -0.382245 -0.774151  \n",
       "1    -0.459799  0.643615  \n",
       "2    -0.245196 -0.065268  \n",
       "3     0.213475 -0.065268  \n",
       "4    -0.024889  2.061382  \n",
       "...        ...       ...  \n",
       "3905 -0.536973  0.643615  \n",
       "3906  1.529609 -0.065268  \n",
       "3907 -0.360576  0.643615  \n",
       "3908 -0.252799  0.643615  \n",
       "3909 -1.082511 -1.483035  \n",
       "\n",
       "[3910 rows x 26 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "## apply tansformation on test(transform)\n",
    "X_test=preprocessor.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.        ,  0.        ,  0.        , ..., -1.2771941 ,\n",
       "        -0.73751038, -0.77415132],\n",
       "       [ 1.        ,  0.        ,  0.        , ..., -1.2771941 ,\n",
       "        -0.6704111 , -0.06526803],\n",
       "       [ 1.        ,  0.        ,  0.        , ...,  0.78296635,\n",
       "        -0.4208322 , -0.77415132],\n",
       "       ...,\n",
       "       [ 0.        ,  1.        ,  0.        , ...,  0.78296635,\n",
       "         0.69001249,  0.64361526],\n",
       "       [ 1.        ,  0.        ,  0.        , ...,  0.78296635,\n",
       "        -0.22827818, -0.77415132],\n",
       "       [ 1.        ,  1.        ,  0.        , ...,  0.78296635,\n",
       "        -0.44611323,  2.06138184]])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## GradientBoost Classifier Training\n",
    "#### We can also combine multiple algorithms\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>16</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.382245</td>\n",
       "      <td>-0.774151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>0.690023</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>1.511598</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.459799</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>1.771041</td>\n",
       "      <td>0.418708</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>0.679690</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.245196</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-1.020350</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>1.408395</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>0.213475</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>2.400396</td>\n",
       "      <td>-1.720227</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>1.511598</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-0.049015</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>-0.024889</td>\n",
       "      <td>2.061382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3905</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-0.653841</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.674182</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-1.506426</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.536973</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3906</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>-0.898180</td>\n",
       "      <td>-0.718725</td>\n",
       "      <td>1.771041</td>\n",
       "      <td>-1.220627</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>1.408395</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>1.529609</td>\n",
       "      <td>-0.065268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3907</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>1.545210</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>2.058043</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-0.777720</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.360576</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3908</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.455047</td>\n",
       "      <td>1.789549</td>\n",
       "      <td>1.284279</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-0.127737</td>\n",
       "      <td>-0.632399</td>\n",
       "      <td>-1.506426</td>\n",
       "      <td>0.782966</td>\n",
       "      <td>-0.252799</td>\n",
       "      <td>0.643615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3909</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.721400</td>\n",
       "      <td>-0.776011</td>\n",
       "      <td>0.282777</td>\n",
       "      <td>-0.725271</td>\n",
       "      <td>-1.220627</td>\n",
       "      <td>1.581280</td>\n",
       "      <td>-0.049015</td>\n",
       "      <td>-1.277194</td>\n",
       "      <td>-1.082511</td>\n",
       "      <td>-1.483035</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3910 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1    2    3    4    5    6    7    8    9   ...        16  \\\n",
       "0     1.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "1     1.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  ... -0.721400   \n",
       "2     1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "3     1.0  0.0  1.0  0.0  1.0  1.0  0.0  0.0  0.0  1.0  ... -0.721400   \n",
       "4     0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...       ...   \n",
       "3905  1.0  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "3906  1.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  ...  1.455047   \n",
       "3907  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  ...  1.455047   \n",
       "3908  1.0  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  ...  1.455047   \n",
       "3909  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  ... -0.721400   \n",
       "\n",
       "            17        18        19        20        21        22        23  \\\n",
       "0    -1.020350  1.284279 -0.725271 -0.127737 -0.632399  0.679690  0.782966   \n",
       "1     0.690023  0.282777 -0.725271  1.511598 -0.632399  0.679690  0.782966   \n",
       "2    -1.020350  0.282777  1.771041  0.418708 -0.632399  0.679690  0.782966   \n",
       "3    -1.020350  1.284279 -0.725271 -0.127737 -0.632399  1.408395 -1.277194   \n",
       "4     2.400396 -1.720227 -0.725271  1.511598 -0.632399 -0.049015 -1.277194   \n",
       "...        ...       ...       ...       ...       ...       ...       ...   \n",
       "3905 -0.653841  1.284279 -0.725271 -0.674182 -0.632399 -1.506426  0.782966   \n",
       "3906 -0.898180 -0.718725  1.771041 -1.220627 -0.632399  1.408395  0.782966   \n",
       "3907  1.545210  0.282777 -0.725271  2.058043 -0.632399 -0.777720  0.782966   \n",
       "3908  1.789549  1.284279 -0.725271 -0.127737 -0.632399 -1.506426  0.782966   \n",
       "3909 -0.776011  0.282777 -0.725271 -1.220627  1.581280 -0.049015 -1.277194   \n",
       "\n",
       "            24        25  \n",
       "0    -0.382245 -0.774151  \n",
       "1    -0.459799  0.643615  \n",
       "2    -0.245196 -0.065268  \n",
       "3     0.213475 -0.065268  \n",
       "4    -0.024889  2.061382  \n",
       "...        ...       ...  \n",
       "3905 -0.536973  0.643615  \n",
       "3906  1.529609 -0.065268  \n",
       "3907 -0.360576  0.643615  \n",
       "3908 -0.252799  0.643615  \n",
       "3909 -1.082511 -1.483035  \n",
       "\n",
       "[3910 rows x 26 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3995    0\n",
       "2610    0\n",
       "3083    0\n",
       "3973    0\n",
       "4044    0\n",
       "       ..\n",
       "4426    0\n",
       "466     0\n",
       "3092    0\n",
       "3772    0\n",
       "860     1\n",
       "Name: ProdTaken, Length: 3910, dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay, \\\n",
    "                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logisitic Regression\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.8458\n",
      "- F1 score: 0.8200\n",
      "- Precision: 0.6994\n",
      "- Recall: 0.3032\n",
      "- Roc Auc Score: 0.6366\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.8354\n",
      "- F1 score: 0.8078\n",
      "- Precision: 0.6829\n",
      "- Recall: 0.2932\n",
      "- Roc Auc Score: 0.6301\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- Roc Auc Score: 1.0000\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9162\n",
      "- F1 score: 0.9151\n",
      "- Precision: 0.8045\n",
      "- Recall: 0.7539\n",
      "- Roc Auc Score: 0.8547\n",
      "===================================\n",
      "\n",
      "\n",
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- Roc Auc Score: 1.0000\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9264\n",
      "- F1 score: 0.9204\n",
      "- Precision: 0.9685\n",
      "- Recall: 0.6440\n",
      "- Roc Auc Score: 0.8194\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boost\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.8939\n",
      "- F1 score: 0.8819\n",
      "- Precision: 0.8756\n",
      "- Recall: 0.5021\n",
      "- Roc Auc Score: 0.7429\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.8589\n",
      "- F1 score: 0.8398\n",
      "- Precision: 0.7732\n",
      "- Recall: 0.3927\n",
      "- Roc Auc Score: 0.6824\n",
      "===================================\n",
      "\n",
      "\n",
      "Adaboost\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.8565\n",
      "- F1 score: 0.8365\n",
      "- Precision: 0.7308\n",
      "- Recall: 0.3649\n",
      "- Roc Auc Score: 0.6670\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.8354\n",
      "- F1 score: 0.8115\n",
      "- Precision: 0.6630\n",
      "- Recall: 0.3194\n",
      "- Roc Auc Score: 0.6400\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "models={\n",
    "    \"Logisitic Regression\":LogisticRegression(),\n",
    "    \"Decision Tree\":DecisionTreeClassifier(),\n",
    "    \"Random Forest\":RandomForestClassifier(),\n",
    "    \"Gradient Boost\":GradientBoostingClassifier(),\n",
    "    \"Adaboost\":AdaBoostClassifier()\n",
    "}\n",
    "for i in range(len(list(models))):\n",
    "    model = list(models.values())[i]\n",
    "    model.fit(X_train, y_train) # Train model\n",
    "\n",
    "    # Make predictions\n",
    "    y_train_pred = model.predict(X_train)\n",
    "    y_test_pred = model.predict(X_test)\n",
    "\n",
    "    # Training set performance\n",
    "    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
    "    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
    "    model_train_precision = precision_score(y_train, y_train_pred) # Calculate Precision\n",
    "    model_train_recall = recall_score(y_train, y_train_pred) # Calculate Recall\n",
    "    model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)\n",
    "\n",
    "\n",
    "    # Test set performance\n",
    "    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
    "    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
    "    model_test_precision = precision_score(y_test, y_test_pred) # Calculate Precision\n",
    "    model_test_recall = recall_score(y_test, y_test_pred) # Calculate Recall\n",
    "    model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc\n",
    "\n",
    "\n",
    "    print(list(models.keys())[i])\n",
    "    \n",
    "    print('Model performance for Training set')\n",
    "    print(\"- Accuracy: {:.4f}\".format(model_train_accuracy))\n",
    "    print('- F1 score: {:.4f}'.format(model_train_f1))\n",
    "    \n",
    "    print('- Precision: {:.4f}'.format(model_train_precision))\n",
    "    print('- Recall: {:.4f}'.format(model_train_recall))\n",
    "    print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))\n",
    "\n",
    "    \n",
    "    \n",
    "    print('----------------------------------')\n",
    "    \n",
    "    print('Model performance for Test set')\n",
    "    print('- Accuracy: {:.4f}'.format(model_test_accuracy))\n",
    "    print('- F1 score: {:.4f}'.format(model_test_f1))\n",
    "    print('- Precision: {:.4f}'.format(model_test_precision))\n",
    "    print('- Recall: {:.4f}'.format(model_test_recall))\n",
    "    print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))\n",
    "\n",
    "    \n",
    "    print('='*35)\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Hyperparameter Training\n",
    "rf_params = {\"max_depth\": [5, 8, 15, None, 10],\n",
    "             \"max_features\": [5, 7, \"auto\", 8],\n",
    "             \"min_samples_split\": [2, 8, 15, 20],\n",
    "             \"n_estimators\": [100, 200, 500, 1000]}\n",
    "gradient_params={\"loss\": ['log_loss','deviance','exponential'],\n",
    "             \"criterion\": ['friedman_mse','squared_error','mse'],\n",
    "             \"min_samples_split\": [2, 8, 15, 20],\n",
    "             \"n_estimators\": [100, 200, 500],\n",
    "              \"max_depth\": [5, 8, 15, None, 10]\n",
    "                }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': [5, 8, 15, None, 10],\n",
       " 'max_features': [5, 7, 'auto', 8],\n",
       " 'min_samples_split': [2, 8, 15, 20],\n",
       " 'n_estimators': [100, 200, 500, 1000]}"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rf_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'loss': ['log_loss', 'deviance', 'exponential'],\n",
       " 'criterion': ['friedman_mse', 'squared_error', 'mse'],\n",
       " 'min_samples_split': [2, 8, 15, 20],\n",
       " 'n_estimators': [100, 200, 500],\n",
       " 'max_depth': [5, 8, 15, None, 10]}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gradient_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Models list for Hyperparameter tuning\n",
    "randomcv_models = [\n",
    "                   (\"RF\", RandomForestClassifier(), rf_params),\n",
    "    (\"GradientBoost\", GradientBoostingClassifier(), gradient_params)\n",
    "                   \n",
    "                   ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('RF',\n",
       "  RandomForestClassifier(),\n",
       "  {'max_depth': [5, 8, 15, None, 10],\n",
       "   'max_features': [5, 7, 'auto', 8],\n",
       "   'min_samples_split': [2, 8, 15, 20],\n",
       "   'n_estimators': [100, 200, 500, 1000]}),\n",
       " ('GradientBoost',\n",
       "  GradientBoostingClassifier(),\n",
       "  {'loss': ['log_loss', 'deviance', 'exponential'],\n",
       "   'criterion': ['friedman_mse', 'squared_error', 'mse'],\n",
       "   'min_samples_split': [2, 8, 15, 20],\n",
       "   'n_estimators': [100, 200, 500],\n",
       "   'max_depth': [5, 8, 15, None, 10]})]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randomcv_models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:    9.5s\n",
      "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:   20.4s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done 134 tasks      | elapsed:    9.2s\n",
      "[Parallel(n_jobs=-1)]: Done 237 out of 300 | elapsed:   18.3s remaining:    4.8s\n",
      "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:   42.9s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------- Best Params for RF -------------------\n",
      "{'n_estimators': 200, 'min_samples_split': 2, 'max_features': 8, 'max_depth': 15}\n",
      "---------------- Best Params for GradientBoost -------------------\n",
      "{'n_estimators': 500, 'min_samples_split': 20, 'max_depth': 15, 'loss': 'exponential', 'criterion': 'mse'}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "\n",
    "model_param = {}\n",
    "for name, model, params in randomcv_models:\n",
    "    random = RandomizedSearchCV(estimator=model,\n",
    "                                   param_distributions=params,\n",
    "                                   n_iter=100,\n",
    "                                   cv=3,\n",
    "                                   verbose=2,\n",
    "                                   n_jobs=-1)\n",
    "    random.fit(X_train, y_train)\n",
    "    model_param[name] = random.best_params_\n",
    "\n",
    "for model_name in model_param:\n",
    "    print(f\"---------------- Best Params for {model_name} -------------------\")\n",
    "    print(model_param[model_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- Roc Auc Score: 1.0000\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9366\n",
      "- F1 score: 0.9324\n",
      "- Precision: 0.9708\n",
      "- Recall: 0.6963\n",
      "- Roc Auc Score: 0.8456\n",
      "===================================\n",
      "\n",
      "\n",
      "GradientBoostclassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- Roc Auc Score: 1.0000\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9581\n",
      "- F1 score: 0.9566\n",
      "- Precision: 0.9688\n",
      "- Recall: 0.8115\n",
      "- Roc Auc Score: 0.9026\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "models={\n",
    "    \n",
    "    \"Random Forest\":RandomForestClassifier(n_estimators=1000,min_samples_split=2,\n",
    "                                          max_features=7,max_depth=None),\n",
    "    \"GradientBoostclassifier\":GradientBoostingClassifier(n_estimators=500,\n",
    "                                                        min_samples_split=20,\n",
    "                                                        max_depth=15,\n",
    "                                                        loss='exponential',\n",
    "                                                        criterion='mse')\n",
    "}\n",
    "for i in range(len(list(models))):\n",
    "    model = list(models.values())[i]\n",
    "    model.fit(X_train, y_train) # Train model\n",
    "\n",
    "    # Make predictions\n",
    "    y_train_pred = model.predict(X_train)\n",
    "    y_test_pred = model.predict(X_test)\n",
    "\n",
    "    # Training set performance\n",
    "    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy\n",
    "    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score\n",
    "    model_train_precision = precision_score(y_train, y_train_pred) # Calculate Precision\n",
    "    model_train_recall = recall_score(y_train, y_train_pred) # Calculate Recall\n",
    "    model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)\n",
    "\n",
    "\n",
    "    # Test set performance\n",
    "    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy\n",
    "    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score\n",
    "    model_test_precision = precision_score(y_test, y_test_pred) # Calculate Precision\n",
    "    model_test_recall = recall_score(y_test, y_test_pred) # Calculate Recall\n",
    "    model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc\n",
    "\n",
    "\n",
    "    print(list(models.keys())[i])\n",
    "    \n",
    "    print('Model performance for Training set')\n",
    "    print(\"- Accuracy: {:.4f}\".format(model_train_accuracy))\n",
    "    print('- F1 score: {:.4f}'.format(model_train_f1))\n",
    "    \n",
    "    print('- Precision: {:.4f}'.format(model_train_precision))\n",
    "    print('- Recall: {:.4f}'.format(model_train_recall))\n",
    "    print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))\n",
    "\n",
    "    \n",
    "    \n",
    "    print('----------------------------------')\n",
    "    \n",
    "    print('Model performance for Test set')\n",
    "    print('- Accuracy: {:.4f}'.format(model_test_accuracy))\n",
    "    print('- F1 score: {:.4f}'.format(model_test_f1))\n",
    "    print('- Precision: {:.4f}'.format(model_test_precision))\n",
    "    print('- Recall: {:.4f}'.format(model_test_recall))\n",
    "    print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))\n",
    "\n",
    "    \n",
    "    print('='*35)\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABDgUlEQVR4nO3deZyN5fvA8c9lH/suS4WMfRmR+IpQQoVWFRV9W75KSpGkTaVSqSgiP5X6pl220reyJWlDslMJTVTI3mCW6/fH/cw4xpkzjzFnzsyZ6/16zWvOedbrec7Mc53nvp/7vkVVMcYYYzJSINIBGGOMyd0sURhjjAnJEoUxxpiQLFEYY4wJyRKFMcaYkCxRGGOMCckShTkhIrJGRDpEOo7cQkSGi8jkCO17ioiMjMS+s5uI9BGRz7K4rv1NhpklijxMRDaLSIKIHBCRP7wLR8lw7lNVG6nqwnDuI5WIFBWRJ0Vkq3ecP4nIPSIiObH/IPF0EJH4wGmq+oSq3hSm/YmI3CEiq0XkoIjEi8j7ItIkHPvLKhEZISJvnsw2VHWqql7gY1/HJcec/JvMryxR5H3dVbUkEAc0B+6LbDgnTkQKZTDrfeA84EKgFHAdcAswNgwxiIjktv+HscCdwB1AeaAuMAO4KLt3FOIzCLtI7tv4pKr2k0d/gM3A+QHvnwY+DnjfGlgC7AF+BDoEzCsPvAZsA3YDMwLmXQys8NZbAjRNv0+gGpAAlA+Y1xzYCRT23v8bWOdt/1Pg9IBlFRgA/AT8GuTYzgMOAaemm342kAzU8d4vBJ4EvgP2AjPTxRTqHCwEHge+8o6lDnCDF/N+YBPwH2/ZEt4yKcAB76caMAJ401umpndcfYGt3rm4P2B/McDr3vlYBwwF4jP4bGO942wV4vOfAowHPvbi/RY4I2D+WOA3YB+wDGgXMG8E8AHwpjf/JqAV8LV3rrYD44AiAes0Aj4H/gb+BIYDXYEjQKJ3Tn70li0DvOJt53dgJFDQm9fPO+fPe9sa6U1b7M0Xb95f3me6EmiM+5KQ6O3vADA7/f8BUNCL6xfvnCwj3d+Q/WThWhPpAOznJD68Y/9BagCrgLHe++rALty38QJAZ+99JW/+x8C7QDmgMHCuN/1M7x/0bO+frq+3n6JB9jkfuDkgnmeAid7rS4CfgQZAIeABYEnAsupddMoDMUGObRTwRQbHvYWjF/CF3oWoMe5iPo2jF+7MzsFC3AW9kRdjYdy39TO8i9W5wD/Amd7yHUh3YSd4ovg/XFJoBhwGGgQek3fOa+AugBkliv7Alkw+/ym4C20rL/6pwDsB868FKnjzBgN/AMUC4k70PqcCXrwtcIm1kHcs64BB3vKlcBf9wUAx7/3Z6c9BwL5nAC97n0llXCJP/cz6AUnAQG9fMRybKLrgLvBlvc+hAVA14JhHhvg/uAf3f1DPW7cZUCHS/6t5/SfiAdjPSXx47h/kAO6bkwLzgLLevHuB/6Zb/lPchb8q7ptxuSDbnAA8lm7aBo4mksB/ypuA+d5rwX17be+9/wS4MWAbBXAX3dO99wp0CnFskwMveunmfYP3TR13sR8VMK8h7htnwVDnIGDdRzM5xzOAO73XHfCXKGoEzP8OuNp7vQnoEjDvpvTbC5h3P/BNJrFNASYHvL8QWB9i+d1As4C4F2Wy/UHAdO/1NcAPGSyXdg6891VwCTImYNo1wALvdT9ga7pt9ONoougEbMQlrQJBjjlUotgA9DzZ/y37OfYnt5XJmhN3iaqWwl3E6gMVvemnA1eKyJ7UH+AcXJI4FfhbVXcH2d7pwOB0652KK2ZJ7wOgjYhUA9rjLpJfBmxnbMA2/sYlk+oB6/8W4rh2erEGU9WbH2w7W3B3BhUJfQ6CxiAi3UTkGxH521v+Qo6eU7/+CHj9D5D6gEG1dPsLdfy7yPj4/ewLERksIutEZK93LGU49ljSH3tdEfnIezBiH/BEwPKn4opz/Dgd9xlsDzjvL+PuLILuO5CqzscVe40H/hSRSSJS2ue+TyRO45Mliiihql/gvm2N9ib9hvs2XTbgp4SqjvLmlReRskE29RvweLr1iqvq20H2uQf4DOgF9AbeVu9rnbed/6TbToyqLgncRIhDmgucLSKnBk4UkVa4i8H8gMmBy5yGK1LZmck5OC4GESmKK7oaDVRR1bLAHFyCyyxeP7bjipyCxZ3ePKCGiLTMyo5EpB3ujqoX7s6xLK68P/CJsfTHMwFYD8SqamlcWX/q8r/hiuSCSb+d33B3FBUDzntpVW0UYp1jN6j6gqq2wBUL1sUVKWW6XiZxmiyyRBFdxgCdRSQOV0nZXUS6iEhBESnmPd5ZQ1W344qGXhKRciJSWETae9v4P6C/iJztPQlUQkQuEpFSGezzLeB64HLvdaqJwH0i0ghARMqIyJV+D0RV5+IultNEpJF3DK1x5fATVPWngMWvFZGGIlIceBT4QFWTQ52DDHZbBCgK7ACSRKQbEPjI5p9ABREp4/c40nkPd07KiUh14PaMFvSO7yXgbS/mIl78V4vIMB/7KoWrB9gBFBKRh4DMvpWXwlVsHxCR+sCtAfM+Ak4RkUHeY8ulRORsb96fQM3Up8a8v6/PgGdFpLSIFBCRM0TkXB9xIyJneX9/hYGDuIcakgP2VTvE6pOBx0Qk1vv7bSoiFfzs12TMEkUUUdUdwBvAg6r6G9AT961wB+6b1j0c/cyvw33zXo+rvB7kbWMpcDPu1n83rkK6X4jdzsI9ofOnqv4YEMt04CngHa8YYzXQ7QQP6XJgAfA/XF3Mm7gnaQamW+6/uLupP3AVrXd4MWR2Do6hqvu9dd/DHXtv7/hS568H3gY2eUUqwYrjQnkUiAd+xd0xfYD75p2ROzhaBLMHV6RyKTDbx74+xX0Z2IgrjjtE6KIugCG4Y96P+8LwbuoM79x0BrrjzvNPQEdv9vve710istx7fT0u8a7FncsP8FeUBi6h/Z+33hZcMVzqnfIrQEPv/M8Isu5zuM/vM1zSewVXWW5OghwtKTAm7xGRhbiK1Ii0jj4ZInIrrqLb1zdtYyLF7iiMySEiUlVE2npFMfVwj5pOj3RcxmTGWkQak3OK4J7+qYUrSnoHVw9hTK5mRU/GGGNCsqInY4wxIeW5oqeKFStqzZo1Ix2GMcbkKcuWLdupqpWysm6eSxQ1a9Zk6dKlkQ7DGGPyFBHZktV1rejJGGNMSJYojDHGhGSJwhhjTEiWKIwxxoRkicIYY0xIliiMMcaEFLZEISKvishfIrI6g/kiIi+IyM8islJEzgxXLMYYY7IunO0opuC6SH4jg/ndcN1Tx+LGZ57g/TZR6lBicuYLGWNynbAlClVdJCI1QyzSE3jDGxHtGxEpKyJVvUFPTJR57/vfGDptZaTDMCbfOXvrKm5cOvOkthHJltnVOXYglXhv2nGJQkRuAW4BOO2003IkuJP11rdbefzjtSc9dma0OJKUQsECwpAL6kU6FGPyhWJ7dtFm4lPU//RD9p2S0aCO/kQyUUiQaUGvq6o6CZgE0LJly4hde9/6disTvvjZ17K7DyaSrMp1rU8Pc1R5R2zlUvQ6K9Qw0caYbHP5UJg3C+67j9IPPAAlSmR5U5FMFPEcO7h8DWBbhGLJ1He//s3w6asoXqQgXRud4muduNPKcn2bmuENzBhjUq1ZA2XLQvXq8NRT8Oij0KjRSW82koliFnC7iLyDq8Tem1vrJ7btSaDXy18DMOj8WG5pf0aEIzLGmAAHD8Jjj8Gzz0KfPjBlCtSpk22bD1uiEJG3gQ5ARRGJBx4GCgOo6kRgDnAh8DPwD3BDuGI5WUu37Aag+Wllubld7QhHY4wxAT7+GAYMgC1b4N//dncS2SycTz1dk8l8BQaEa//ZYdueBN76divr/9gHwDNXNEMkWNWKMcZEwEsvuSTRsCEsWgTt2oVlN3luPIqcdP/0VSzYsIMiBQtQpXRRKpUsGumQjDH5XVIS7NgBVatCr16QkAADB0KRImHbpSUKYP0f+1i7bd9x07/6ZRcA6x7rSsECdidhjImw776D//wHChWCb76BihVh8OCw7zbfJ4qUFKXb2C/RDB66/U/72pYkjDGRtWcPDB8OEye6O4mxY6FAznXVl+8TxeKfd6IK59SpyOOXNj5mniDUKBcTociMMQZYtQo6d3bFTXfc4R55LV06R0PIl4kiOUX5a/8hAJ79bAMAd19Ql9MrZL1BijHGZKvERChcGOrWhY4d4Z574MzI9J0a9YkiOUVJTjm2XOnhWWt4+7utae9jChek+allczgyY4wJ4vBh94jrm2/C8uVQsiS8/XZEQ4rqRPH7ngQ6P/cF/xw5vtfS6mVjGNjJNUiJO62sPfZqjIm8+fPh1lth40a46iqXNEqWjHRU0Z0oXlrwM/8cSaZVrfKcW7fSMfNanl6Os2tXiFBkxhgTICEBbrnF3UXUrg3/+x906RLpqNJEbaJITlGmfuuKl17rdxYlikbtoRpj8rpixWDnTnjgAfd0U0zueogmaodCPXA4CXDdbliSMMbkOitXuruG+HgQcV1xPPZYrksSEMWJItXFTatFOgRjjDnq4MGjTzAtXw4//eSm52C7iBOVeyMzxphoM2uW65dp9GjXgd+GDe7R11zOymSMMSanzJjhGsstXgxt20Y6Gt+iNlF8/cvOSIdgjMnvEhPhhRfcXcOZZ7quN4oVcw3p8pCoLHo6kpRC/zeXA1C9bLEIR2OMyZe++QZatoQhQ+C999y0UqXyXJKAKEwU+w8lMmnRLwBcEleNro2rRjgiY0y+sns39O8P//oX/P03TJ8OTz4Z6ahOStQVPT372UamLNkMwD1d60c2GGNM/jNpEkyeDHfdBSNGuLuIPC7qEsWOA4cpV7ww7/2nDdXL5r7nkY0xUWjDBte76znnwKBB0K0bNG0a6aiyTdQUPakqhxKT+XjldgBiq+T9LG6MyeUOHYKHH3ZJYcAAUIWiRaMqSUAU3VE8MGN1Wpcd58RWymRpY4w5SZ9/DrfdBj//DL17w7PPuhbWUShkohCRYsDFQDugGpAArAY+VtU14Q8vcykpymUTlrDq973UKBdD77NP48oWp0Y6LGNMNFu0CC64AGJjXcI4//xIRxRWGSYKERkBdAcWAt8CfwHFgLrAKC+JDFbVleEPM2PJqqz4bQ/NTyvLXefXpX1du5swxoRBcjKsXQtNmkC7dvDKK+5Oolj0P4If6o7ie1UdkcG850SkMnBa9od0Ylb/vheATvUqW5IwxoTHDz+4R17XrXN9M1Wp4rrgyCcyrMxW1Y8D34tIiXTz/1LVpeEKzK8HZqwG4MzTy0U4EmNM1Nm/H+6+2zWc27wZJkyAypUjHVWOy/SpJxH5l4isBdZ575uJyEthj8yHvQmJrNm2D4A2NgiRMSY77d0LjRrB88/DzTfD+vXQp0/UVliH4uepp+eBLsAsAFX9UUTahzUqHzbtOMDYea573hva1qRAgfz34RljwmDfPtdxX5kybtS5886DNm0iHVVE+WpHoaq/pZt0/CDUOWhvQiKXjP+KmSu2USamMP3PPSOS4RhjokFiIjz9NNSo4caJADfiXD5PEuDvjuI3EfkXoCJSBLgDrxgqUga/t4J9h5I4rXxxFg3N/X25G2Nyua++cpXVq1fDJZdAJXswJpCfO4r+wACgOhAPxAG3hTGmkA4eTmLuur8AmD3wnEiFYYyJFgMHuq439u6FmTNdJ36nWlusQH7uKOqpap/ACSLSFvgqPCGFpup+v3xdC8rE5L3ueo0xuYDq0UrpU05xXYE//DCULBnZuHIpP3cUL/qclqMqliwS6RCMMXnR+vVuIKGZM937+++HZ56xJBFCqJbZbYB/AZVE5O6AWaWBguEOzBhjslVCAjzxBDz1FJQo4d4bX0LdURQBSuKSSamAn33AFX42LiJdRWSDiPwsIsOCzC8jIrNF5EcRWSMiN5z4IRhjTCbmzXNdb4wcCVdf7boFv/rqSEeVZ2R4R6GqXwBfiMgUVd1yohsWkYLAeKAzrhL8exGZpaprAxYbAKxV1e4iUgnYICJTVfXIie7PGGMyFB8PhQq5hNGpU6SjyXP8VGb/IyLPAI1wnQICoKqZne1WwM+quglARN4BegKBiUKBUiIiuLuXv4Ek/+EbY0wQyckwcSIUKeJaVV9/vbuDKFo00pHlSX4qs6cC64FawCPAZuB7H+tVBwIb6sV70wKNAxoA24BVwJ2qmpJ+QyJyi4gsFZGle/fu9bFrY0y+tXw5tG4Nt98On37qpolYkjgJfhJFBVV9BUhU1S9U9d9Aax/rBetTQ9O97wKswI11EQeME5HSx62kOklVW6pqyzJlyvjYtTEm39m3D+68E846C377Dd5+G95/P9JRRQU/iSLR+71dRC4SkeZADR/rxQOBrVZq4O4cAt0AfKjOz8CvQH0f2zbGmGP9+COMG+daWK9f74qa8mEHfuHgp45ipIiUAQbj2k+UBgb5WO97IFZEagG/A1cDvdMtsxU4D/hSRKoA9YBN/kI3xuR7v/4KCxa4sSHatXPDktaqFemook6miUJVP/Je7gU6QlrL7MzWSxKR24FPce0uXlXVNSLS35s/EXgMmCIiq3BFVfeq6s4sHYkxJv84csSNUf3oo26EuUsvhXLlLEmESagGdwWBXrgK6P+p6moRuRgYDsQAzTPbuKrOAeakmzYx4PU24IKshW6MyZe+/NIVL61dC5ddBmPHuiRhwibUHcUruDqG74AXRGQL0AYYpqozciA2Y4w51o4dcMEFbijS2bPh4osjHVG+ECpRtASaqmqKiBQDdgJ1VPWPnAktuJTjHpwyxkQ1VZg7Fzp3dt1/f/SRe/y1RInM1zXZItRTT0dS2zSo6iFgY6STBMDO/a7RdtFC1t2UMVFvzRo491x3F7FwoZt23nmWJHJYqDuK+iKy0nstwBneewFUVZuGPbogCniprVG145pbGGOixT//uH6ZnnnGDUs6eTK0j/gIzPlWqETRIMeiOEGNq5dG7PloY6KTqusG/LvvoG9flyxsxLmICtUp4Al3BGiMMVm2fTtUrgwFC8Lw4VCmDHToEOmoDP5aZhtjTPgkJ8MLL0C9evDSS25az56WJHIRSxTGmMhZuhRatXJ9NP3rX3DhhZGOyAThK1GISIyI1At3MMaYfOTpp12S2L4d3n0XPvkEzjgj0lGZIDJNFCLSHdfD6/+893EiMivMcRljopEqJHr9jLZqBQMGwLp10KuXdeCXi/m5oxiBG4RoD4CqrgBqhisgY0yU+uUX6NoVhnmjInfoAC++6CqtTa7mJ1EkqaqNFmSMyZrDh12biMaN4euvrXgpD/LTzfhqEekNFBSRWOAOYEl4wzLGRIVly+Daa934EFdeCWPGQLVqkY7KnCA/dxQDceNlHwbewnU3PiiMMRljokXJkq7uYc4ceO89SxJ5lJ87inqqej9wf7iDMcbkcSkp8Nprrohp8mTXNmL16qN975g8yc+n95yIrBeRx0SkUdgjMsbkTatXu/6YbroJfvoJDh500y1J5HmZfoKq2hHoAOwAJonIKhF5INyBGWPyiIMH4d57oXlzVxfx2muup1fr4TVq+Er1qvqHqr4A9Me1qXgonEEZY/KQQ4dccrj+etiwAfr1szYRUcZPg7sGIjJCRFYD43BPPNUIe2TGmNwrPh6GDnX9NFWo4O4kXnnFvTZRx88dxWvAbuACVT1XVSeo6l9hjssYkxslJcHzz0ODBjBuHKxY4aaXLx/RsEx4ZfrUk6q2zolAjDG53Lffwn/+Az/+6DrvGzcOatWKdFQmB2SYKETkPVXtJSKr4JiBqiM6wp0xJgJSUuCGG2DvXvjgA7jsMquHyEdC3VHc6f2+OCcCMcbkMqouKXTtCqVKwYcfQvXq7rXJVzKso1DV7d7L21R1S+APcFvOhGeMiYiffoIuXVyvrpMmuWn161uSyKf8VGZ3DjKtW3YHYozJBQ4fhkcfhSZNXJ3EuHEwaFCkozIRFqqO4lbcnUNtEVkZMKsU8FW4AzPGRMCAAe4x16uvhueeg6pVIx2RyQVC1VG8BXwCPAkMC5i+X1X/DmtUIRxJSkE18+WMMT799ZerrD7lFNfC+sorXbGTMZ5QRU+qqpuBAcD+gB9EJGIPTR9OSmH/oaRI7d6Y6JGS4uof6tVzY1YDxMZakjDHyeyO4mJgGe7x2MBn4RSoHca4QurV0hqGG3NSVq6E/v1dL68dOsAjj0Q6IpOLZZgoVPVi73eua1FTpXSxSIdgTN71wQeuDqJcOXjjDTewkLWJMCH46euprYiU8F5fKyLPichp4Q/NGJOt9u1zvzt0cJXWGzbAdddZkjCZ8vN47ATgHxFpBgwFtgD/DWtUxpjss3Ur9OwJ553nOvGrWBHGjrX+mYxvfhJFkqoq0BMYq6pjcY/IZkpEuorIBhH5WUSGZbBMBxFZISJrROQL/6EbY0JKTITRo10HfnPnusZz9sigyQI/Q6HuF5H7gOuAdiJSECic2UrecuNxDfbige9FZJaqrg1YpizwEtBVVbeKSOUsHIMxJr0tW6BHD1dp3b07vPginH56pKMyeZSfO4qrgMPAv1X1D6A68IyP9VoBP6vqJlU9AryDuysJ1Bv4UFW3Alj35cacpNQ7hlNOgSpVYPp0mDnTkoQ5KX6GQv0DmAqUEZGLgUOq+oaPbVcHfgt4H+9NC1QXKCciC0VkmYhc7zNuY0wgVXjzTTjrLDhwAIoWhc8+g0suscpqc9L8PPXUC/gOuBLoBXwrIlf42Hawv870BaSFgBbARUAX4EERqRskhltEZKmILPWxX2Pylw0bXEX1dddBoUKwa1ekIzJRxk8dxf3AWanFQiJSCZgLfJDJevHAqQHvawDbgiyzU1UPAgdFZBHQDNgYuJCqTgImARStGmu1ccaAG23uscdg1CiIiYEJE+CWW6CAnxJlY/zz8xdVIF3dwS6f630PxIpILREpAlwNzEq3zExcBXkhESkOnA2s87FtY0zBgvDll3DFFe6uon9/SxImLPzcUfxPRD4F3vbeXwXMyWwlVU0SkduBT4GCwKuqukZE+nvzJ6rqOhH5H7ASSAEmq+rqrByIMfnCH3/A8OGuy41TT4U5c6CY9VRgwsvPmNn3iMhlwDm4eodJqjrdz8ZVdQ7pkoqqTkz3/hn8PUVlTP6VnOw68LvvPkhIgG7dXKKwJGFyQKjxKGKB0cAZwCpgiKr+nlOBGWM8P/zgipW++85VWr/0EtQ97pkPY8ImVIHmq8BHwOW4HmRfzJGIjDHHGjcONm+GqVPh888tSZgcF6roqZSq/p/3eoOILM+JgIzJ91RhxgyoWROaN3fdcIwe7Xp7NSYCQt1RFBOR5iJypoicCcSke2+MyW6bN7uuNy67DMaMcdPKlbMkYSIq1B3FduC5gPd/BLxXoFO4gjIm30lMdGNUP/KIe8R19Oijo84ZE2GhBi7qmJOBGJOvvfwyDBvmutwYOxZOsyFfTO7hpx2FMSYcdu1yRU0tWsDNN0OdOtC1a6SjMuY41ozTmJymCq+/DvXrw5VXuq44iha1JGFyLUsUxuSkdeugY0fo1w9iY93TTYXsxt7kbpn+hYqIAH2A2qr6qDde9imq+l3Yo8tA4YKW30we9OOPrhvwkiVdK+sbb7S+mUye4Oev9CWgDXCN934/buS6iKlcqmgkd2/MiYmPd7+bNnVPNa1f7+okLEmYPMLPX+rZqjoAOASgqruBImGNKhOVS1uiMHnAtm1w1VVuzOrff3cDCN13H1S2EX9N3uInUSR6418rpI1HkRLWqDJRqZR1hGZyseRk1+1GgwZuGNKhQ6FixUhHZUyW+alFewGYDlQWkceBK4AHwhpVCAKULmaVfyaXOnQI2reH77+Hzp1dB3516kQ6KmNOip9uxqeKyDLgPNx1+hJVjdjgQiKC2BjAJrdJTITChV233x07wt13u2In+1s1UUBUQ48s6j3ldBxV3RqWiDIRU62uJmzbmPmCxuQEVZg2DQYPhunT4UzrBs3kTiKyTFVbZmVdP2U4H+PqJwQoBtQCNgCNsrJDY6LGpk1w++3wySeul1d7islEKT9FT00C33s9x/4nbBEZkxc89xzcf79rLDdmDAwYYA3nTNQ64b9sVV0uImeFIxhj8owDB+DCC10HfjVqRDoaY8LKT8vsuwPeFgDOBHaELSJjcqOdO+Gee+DSS914EQ88YEVNJt/wc0dRKuB1Eq7OYlp4wjEml0lJgSlTXJLYtw+aeCWxliRMPhIyUXgN7Uqq6j05FI8xucfatdC/P3z5JZxzDkycCI3sGQ6T/4RMFKqabMOemnxr6VJYswZeecX19mp3ESafyrAdhYgUUtUkEXkWiAXeBw6mzlfVD3MmxGNZOwoTVnPmuAGFrrvOtZHYvRvKl490VMactJNpRxHqK1JqN+LlgV24MbK7ez8XZ2VnxuRa8fFwxRVw0UWunyZV16rakoQxIYueBEBVb8ihWIzJeUlJMH68e4opKQkefxyGDLGuN4wJECpRVEr3aOwxVPW5MMRjTM5atgwGDXLDkI4fD7VrRzoiY3KdUImiIFAS787CmKixdy/MmweXXQZnnw3ffutGnrO7CGOCCpUotqvqozkWiTHhpgrvvefuIHbtgs2boVo1aNUq0pEZk6uFqsy2r1cmevzyC3TrBldfDdWrw5IlLkkYYzIV6o6iR2Yri0hJVT2QjfEYk/3274cWLVwr6xdegNtug4IFIx2VMXlGqDuKKSLyrIi0F5ESqRNFpLaI3CginwJdwx+iMVm0cqX7XaqUazS3bh0MHGhJwpgTlGGiUNXzgHm4LsXXiMheEdkFvAmcAvRV1Q9yJkxjTsCOHdC3LzRr5hrQAVx+uStyMsacsMy68JgDzMnqxkWkKzAW9wTVZFUdlcFyZwHfAFdZ8jFZlpICr74KQ4e6bsCHD4cOHSIdlTF5Xqad14jIByJyoYicUEc3XoeC44FuQEPgGhFpmMFyTwGfnsj2jTnO5ZfDzTe7Hl5XrHCN54oXj3RUxuR5fi7+E4E+wE8iMkpE6vvcdivgZ1XdpKpHgHeAnkGWG4jrtvwvn9s15qiDB12LaoBrrnFdgi9cCA2P+05ijMmiTBOFqs5V1T64AYs2A5+LyBIRuUFECodYtTrwW8D7eG9aGhGpDlyKS0YZEpFbRGSpiCzNqBNDkw/Nnu0Swksvufe9erm6CWs4Z0y28lWcJCIVgH7ATcAPuHqHM4HPQ60WZFr6q/wY4F5VTQ61f1WdpKotVbWl2EXA/Paba1Xdo4d7oqlFi0hHZExU8zMU6odAfeC/QHdV3e7NeldEloZYNR44NeB9DWBbumVaAu94F/+KwIUikqSqM/yFb/KdN990gwmlpMCoUXDXXVCkSKSjMiaq+RkKdbL39FMaESmqqocz6dv8eyBWRGoBvwNXA70DF1DVWgHbnAJ8ZEnCBJXa7XeNGu5JphdfhFq1Ml3NGHPy/BQ9jQwy7evMVlLVJOB23NNM64D3VHWNiPQXkf4nFqbJt/bsgVtvdWNWg0sSH31kScKYHJThHYWInIKrfI4RkeYcrXMoDfh65jBYOwxVDVpxrar9/GzT5BOq8PbbcPfdrgHdXXcdvaswxuSoUEVPXXAV2DWAwLEn9gPDwxiTye9+/RVuuQXmznXdf3/yCTRvHumojMm3MkwUqvo68LqIXK6q03IwJpPfJSa6fprGj4f//Mf6ZjImwkIVPV2rqm8CNYONdGcj3JlsNW8efPwxPPcc1K0LW7ZAsWKRjsoYQ+jK7NQeY0sCpYL8GHPy/vwTrr0Wzj8fZs1yAwqBJQljcpFQRU8vey9fUtUdORSPyS9SUuD//g+GDXPdcDz4INx3H8TERDoyY0w6ftpRLBGRX4F3gQ9VdXeYYzL5wd698MADEBcHEyZAfb9diBljcpqfvp5igQeARsAyEflIRK4Ne2Qm+hw44OogkpOhXDn49luYP9+ShDG5nK++nlT1O1W9G9cj7N/A62GNykSfmTNdB36DB8MXX7hptWtbuwhj8gA/41GUFpG+IvIJsATYjksYxmRuyxbo2RMuuQTKloWvvoJOnSIdlTHmBPipo/gRmAE8qqqZdt1hTBpVuOIKWLsWnn4aBg2CwqF6pjfG5EZ+EkVttUEgzIn45hto1Mh1AT5pEpQvD6efHumojDFZlGHRk4iM8V7OEpHjfnImPJOn/P23a0ndpg2MHu2mNW9uScKYPC7UHcV/vd+jcyIQk4epunEiBg92yWLw4KO9vRpj8rxQDe6WeS/jVHVs4DwRuRP4IpyBmTxk+HA3iFDr1vD559CsWaQjMsZkIz+Px/YNMq1fNsdh8ppDh2DnTvf6hhtco7mvvrIkYUwUCtUp4DW4EelqpauTKAXsCndgJhf7/HO47TZo3BimT3ed+NWtG+mojDFhEqqOIrXNREXg2YDp+4GV4QzK5FJ//OEGEnr7bYiNhdtvj3RExpgcEKqOYguwBWiTc+GYXGvBArj0UkhIgBEj4N57rYdXY/KJUEVPi1X1HBHZDwS2oxBAVbV02KMzkZeY6BrJNW0KnTvD449bMZMx+YzktbZ0MdXqasK2jZEOI/rt3w8PPQRff+0qqW2UOWPyNBFZpqots7Kun76ezhCRot7rDiJyh4iUzcrOTB6gCh9+CA0awNixrsHc4cORjsoYE0F+Ho+dBiSLSB3gFaAW8FZYozKRsXMndO8Ol18OFSvCkiXusdfixSMdmTEmgvwkihRVTQIuBcao6l1A1fCGZSKiVCk3NOlzz8HSpa4BnTEm3/OTKBK9NhV9gY+8adYFaLRYvBi6dXODChUt6gYTuusuKOSnv0hjTH7gJ1HcgHtE9nFV/VVEagFvhjcsE3a7dsFNN0G7dq4b8E2b3PQCvsayMsbkI/bUU36jCq+/DkOGwJ49rgHdww9DiRKRjswYE0Yn89RTpuULItIWGAGc7i2f2o6idlZ2aHKBN96AevVg4kRo0iTS0Rhjcjk/BdGvAHcBy4Dk8IZjwiIhwfXuevPNUKMGTJsGZcpYMZMxxhc/iWKvqn4S9khMeHz6qevAb9MmqFwZBgyAcuUiHZUxJg/xkygWiMgzwIdAWssrVV0etqjMydu2zT299N57rphp/nzo2DHSURlj8iA/ieJs73dgJYgCnbI/HJNtRo6EmTPh0Udh6FD36KsxxmSBPfUUTZYtO9qB365dsHs31KkT6aiMMblAuPt6qiIir4jIJ977hiJyo8/AuorIBhH5WUSGBZnfR0RWej9LRMSGR8uKffvgjjugVSs3LClAhQqWJIwx2cLPYy9TgE+Bat77jcCgzFYSkYLAeKAb0BC4RkQaplvsV+BcVW0KPAZM8hW1cVTh/fehfn0YNw5uvRXetLaQxpjs5SdRVFTV94AUAK/fJz+PybYCflbVTap6BHgH6Bm4gKouUdXd3ttvgBq+Izfw1lvQqxeccorremPcOChbNtJRGWOijJ/K7IMiUgFv8CIRaQ3s9bFedeC3gPfxHK0YD+ZGIOhjuCJyC3ALQNFT8nlxypEj7lHX+vXhiitcG4l+/axvJmNM2Pi5utwNzALOEJGvgErAFT7WkyDTgtaci0hHXKI4J9h8VZ2EVywVU61u3qp9z06LFkH//q4Dv40b3VCkN90U6aiMMVEu00ShqstF5FygHu7iv0FVE31sOx44NeB9DWBb+oVEpCkwGeimqrt8RZ3f7NwJ99wDU6ZAzZqu6w0br9oYk0NCjZl9FvCbqv6hqkki0gK4HNgiIiNU9e9Mtv09EOv1Nvs7cDXQO90+TsM15LtOVe2Z12A2bYKzznJPNg0bBg8+aAMJGWNyVKjK7JeBIwAi0h4YBbyBq5/I9Okkr9L7dtwTU+uA91R1jYj0F5H+3mIPARWAl0RkhYgszfKRRJt9+9zvWrXghhvghx/gySctSRhjclyGDe5E5EdVbea9Hg/sUNUR3vsVqhqXU0EGivoGd//8A489BpMmwY8/uk78jDHmJIWrwV1BEUktmjoPmB8wzx6xCYePP4ZGjVxPrz17QkxMpCMyxpiQF/y3gS9EZCeQAHwJICJ18Pd4rPErKQmuuQY++AAaNIAvvoD27SMdlTHGACEShao+LiLzgKrAZ3q0jKoAMDAngot6qiDi2kBUqQJPPAGDB0ORIpGOzBhj0lingJHy/fdubIiJE+HMMyMdjTEmyoW1U0CTzfbuhdtvh7PPhvh418urMcbkYpYoclJqB34TJrhksX49dO4c6aiMMSYke3opJ61bB9Wrw+zZ0DJLd4DGGJPjrI4inA4fhmeegWbNoHt3SEyEAgWgYMFIR2aMyWesjiI3WrDAJYgHH4R589y0woUtSRhj8hxLFNntr7+gb1/o1MndQXzyCYwZE+mojDEmyyxRZLfPPoO334b774fVq6Fr10hHZIwxJ8Uqs7PDqlWwYYMbSKhPH/jXv6B27UhHZYwx2cLuKE7GwYMwdCg0b+5+Jya6ltaWJIwxUcTuKLJq9mzXFmLrVrjxRnjqKVdZnYskJiYSHx/PoUOHIh2KMSaHFCtWjBo1alA4G69HliiyYvVq6NHD9fT65ZdwTtARXCMuPj6eUqVKUbNmTUSCjUxrjIkmqsquXbuIj4+nVq1a2bZdK3ryKykJFi50rxs3ho8+coMJ5dIkAXDo0CEqVKhgScKYfEJEqFChQraXIlii8OPbb11L6vPOg59+ctMuuijXFTUFY0nCmPwlHP/zlihC2b0bbr0V2rSBnTtdX0116kQ6KmOMyVGWKDJy+LB7mmnSJBg0yPXTdNll7qkm49uff/5J7969qV27Ni1atKBNmzZMnz79pLY5YsQIRo8eDcBDDz3E3Llzs7SdFStWMGfOnKDzFi5cSJkyZYiLi6Np06acf/75/PXXX1mOOb3Nmzfz1ltvZTh/48aNXHjhhdSpU4cGDRrQq1cv/vzzTxYuXMjFF1+cbXHcdNNNrF27FoD333+fBg0a0LFjR5YuXcodd9xxUtsuWLAgcXFxNG7cmO7du7Nnz560eWvWrKFTp07UrVuX2NhYHnvsMQK7E/rkk09o2bIlDRo0oH79+gwZMiToPmbMmMGjjz56UnGG099//03nzp2JjY2lc+fO7N69O+hyY8eOpXHjxjRq1IgxAQ10M1p/1apV9OvXLweOwKOqeeqnWNVYDav4+KOvX3tNdfny8O4vjNauXRvR/aekpGjr1q11woQJadM2b96sL7zwwnHLJiYm+t7uww8/rM8888xJx/faa6/pgAEDgs5bsGCBXnTRRWnvhw0bpg899NBJ7zOj7QdKSEjQOnXq6KxZs9KmzZ8/X1etWhVyvZPVpUsXnT9/fpbWDfb5lShRIu319ddfryNHjlRV1X/++Udr166tn376qaqqHjx4ULt27arjxo1TVdVVq1Zp7dq1dd26dWnbHj9+fND9tmnTRnfs2HFScYbTPffco08++aSqqj755JM6dOjQ45ZZtWqVNmrUSA8ePKiJiYl63nnn6caNGzNd/7zzztMtW7YE3W+w/31gqWbxumtPPaU6dMg94vrEE/Dee27M6pzM2GH2yOw1rN22L1u32bBaaR7u3ijD+fPnz6dIkSL0798/bdrpp5/OwIFugMQpU6bw8ccfc+jQIQ4ePMisWbPo2bMnu3fvJjExkZEjR9KzZ08AHn/8cd544w1OPfVUKlWqRIsWLQDo168fF198MVdccQXLli3j7rvv5sCBA1SsWJEpU6ZQtWpVOnTowNlnn82CBQvYs2cPr7zyCmeffTYPPfQQCQkJLF68mPvuu4+rrroq6HGoKvv376eOV+z4999/8+9//5tNmzZRvHhxJk2aRNOmTTOc/sUXX3DnnXcCrvx40aJFDBs2jHXr1hEXF0ffvn2566670vb31ltv0aZNG7p37542rWPHjoC700n13XffMWjQIBISEoiJieG1116jXr16rFmzhhtuuIEjR46QkpLCtGnTqFatGr169SI+Pp7k5GQefPBBrrrqKjp06MDo0aOZM2cOixcv5tdff6VHjx5cdNFFjB49mo8++oiDBw8ycOBAVq1aRVJSEiNGjKBnz57HfX7z58/P8G+hTZs2rFy5Mu342rZtywUXXABA8eLFGTduHB06dGDAgAE8/fTT3H///dSvXx+AQoUKcdtttx23zY0bN1K0aFEqVqwIwOzZsxk5ciRHjhyhQoUKTJ06lSpVqjBixAi2bdvG5s2bqVixImPHjqV///5s3boVgDFjxtC2bdsMz+fJmDlzZtpn1rdvXzp06MBTTz11zDLr1q2jdevWFC9eHIBzzz2X6dOnM3To0JDrd+/enXfeeYehQ4eeVIx+WKIA12nfrbe6iuprrnGDCpmTtmbNGs7MZPS+r7/+mpUrV1K+fHmSkpKYPn06pUuXZufOnbRu3ZoePXqwfPly3nnnHX744QeSkpI488wz0xJFqsTERAYOHMjMmTOpVKkS7777Lvfffz+vvvoqAElJSXz33XfMmTOHRx55hLlz5/Loo4+ydOlSxo0bFzS2L7/8kri4OHbt2kWJEiV44oknAHj44Ydp3rw5M2bMYP78+Vx//fWsWLEiw+mjR49m/PjxtG3blgMHDlCsWDFGjRqVdiFOb/Xq1ccdXzD169dn0aJFFCpUiLlz5zJ8+HCmTZvGxIkTufPOO+nTpw9HjhwhOTmZOXPmUK1aNT7++GMA9u49dtj7hx56iPnz5zN69Ghatmx5TEJ6/PHH6dSpE6+++ip79uyhVatWnH/++cd9fhlJTk5m3rx53HjjjYD7u0h/fGeccQYHDhxg3759rF69msGDB2d6/F999dUxf1/nnHMO33zzDSLC5MmTefrpp3n22WcBWLZsGYsXLyYmJobevXtz1113cc4557B161a6dOnCunXrMjyfgfbv30+7du2CxvPWW2/RsGHDY6b9+eefVK1aFYCqVasGLb5s3Lgx999/P7t27SImJoY5c+bQ0huGINT6LVu2ZNSoUZYocsSgQTB2rKuk/uyzqB1IKNQ3/5wyYMAAFi9eTJEiRfj+++8B6Ny5c9pFRlUZPnw4ixYtokCBAvz+++/8+eeffPnll1x66aVp37h69Ohx3LY3bNjA6tWr6ex9fsnJyWn/YACXXXYZAC1atGDz5s2+4m3Xrl3ahfypp55i6NChTJw4kcWLF6ddQDp16sSuXbvYu3dvhtPbtm3L3XffTZ8+fbjsssuoUaPGiZ66oPbu3Uvfvn356aefEBESExMB9+398ccfJz4+nssuu4zY2FiaNGnCkCFDuPfee7n44oszvNgF89lnnzFr1qy0eqFDhw6lfRsP/PzSS0hIIC4ujs2bN9OiRYu0z0ZVM3wy50Se2Nm+fTuVKlVKex8fH89VV13F9u3bOXLkyDHtCHr06EFMTAwAc+fOTauXAdi3bx/79+/P8HwGKlWqFCtWrPAdox8NGjTg3nvvpXPnzpQsWZJmzZpRqFDml+bKlSuzbdu2bI0lI/mzMjslBZKT3etWreChh1x/TVGaJCKlUaNGLF++PO39+PHjmTdvHjt27EibVqJEibTXU6dOZceOHSxbtowVK1ZQpUqVtOfBM7uAqCqNGjVixYoVrFixglWrVvHZZ5+lzS9atCjgKliTkpJO+Fh69OjBokWL0vaVnohkOH3YsGFMnjyZhIQEWrduzfr160Puq1GjRixbtizTmB588EE6duzI6tWrmT17dtq56t27N7NmzSImJoYuXbowf/586taty7Jly2jSpAn33XffCVUAqyrTpk1LO7dbt26lQYMGwLGfX3oxMTGsWLGCLVu2cOTIEcaPH592fEuXLj1m2U2bNlGyZElKlSrl+/hjYmKOaS8wcOBAbr/9dlatWsXLL798zLzAOFNSUvj666/Tjuf333+nVKlSGZ7PQPv37ycuLi7oT2DySVWlShW2b98OuMRWuXLloMdy4403snz5chYtWkT58uWJjY3NdP1Dhw6lJb9wy3+J4scfXad93h8tvXvDI49AsWKRjSsKderUiUOHDjFhwoS0af/880+Gy+/du5fKlStTuHBhFixYwJYtWwBo374906dPJyEhgf379zN79uzj1q1Xrx47duzg66+/BlxR1Jo1a0LGV6pUKfbv3+/rWBYvXswZZ5yRFs/UqVMBV2dQsWJFSpcuneH0X375hSZNmnDvvffSsmVL1q9fH3LfvXv3ZsmSJWnFRAD/+9//WLVq1THL7d27l+rVqwOuvifVpk2bqF27NnfccQc9evRg5cqVbNu2jeLFi3PttdcyZMiQYxJ4Zrp06cKLL76Ylgh/+OEH3+sClClThhdeeIHRo0eTmJhInz59WLx4cdrTagkJCdxxxx1pRSj33HMPTzzxBBs3ugHKUlJSeO65547bboMGDfj555+Dno/XX389w3guuOCCY4obU+8QMjqfgVLvKIL9pC92AvcFIzWW119/Pa3OLb3UIqWtW7fy4Ycfcs0112S6/saNG2ncuHGGx5md8k+iOHAABg+GFi1g0yY45ZRIRxT1RIQZM2bwxRdfUKtWLVq1akXfvn2Pq8xL1adPH5YuXUrLli2ZOnVqWmXmmWeeyVVXXUVcXByXX3550GKTIkWK8MEHH3DvvffSrFkz4uLiWLJkScj4OnbsyNq1a4mLi+Pdd989bn5qHUWzZs3473//m1bePWLECJYuXUrTpk0ZNmxY2j9yRtPHjBlD48aNadasGTExMXTr1o2mTZtSqFAhmjVrxvPPP3/MfmNiYvjoo4948cUXiY2NpWHDhkyZMuW4b6NDhw7lvvvuo23btiSn3iED7777Lo0bNyYuLo7169dz/fXXs2rVKlq1akVcXByPP/44DzzwQMhzE+jBBx8kMTGRpk2b0rhxYx588EHf66Zq3rw5zZo145133iEmJoaZM2cycuRI6tWrR5MmTTjrrLO4/fbbAWjatCljxozhmmuuoUGDBjRu3DjtW3Wg9u3b88MPP6QlsBEjRnDllVfSrl27tAruYF544YW0z6lhw4ZMnDgRyPh8noxhw4bx+eefExsby+eff86wYcMA2LZtGxdeeGHacpdffjkNGzake/fujB8/nnLlyoVcH2DBggVcdNFF2RJnZvLHUKhz58INN0B8PNxyC4waBd4HEc3WrVuXVkRgTDS688476d69e1rlen5x+PBhzj33XBYvXhy0PiPY/74NhZqZIkWgfHn46it4+eV8kSSMyQ+GDx8esjgzWm3dupVRo0b5qvTODtH51FNioht+dO9eGDkS2rd3HfgVyB950Zj8okqVKkGfgot2sbGxaRXeOSH6rpxLlrh6iKFDXbcbKSluej5NEnmtaNEYc3LC8T8fPVfPv/929Q9t28KePTBjBkyblm8TBLgBTHbt2mXJwph8Qr3xKIpl81Oc0VP0tGsXvPUWDBkCDz8MJUtGOqKIq1GjBvHx8ce0WzDGRLfUEe6yU95OFBs2wLvvugZzsbGwZQtUqBDpqHKNwoULZ+soV8aY/Cms5TIi0lVENojIzyIyLMh8EZEXvPkrRSR0x0CpEhJccmjaFJ5/Hn77zU23JGGMMdkubIlCRAoC44FuQEPgGhFJ33SxGxDr/dwCTCATJQ8fhCZN4LHH4MorYf16OPXUbI7eGGNMqnDeUbQCflbVTap6BHgHSN9+vSfwhtdd+jdAWRGpmn5Dgarv+dNVUM+dC2++CVWqhCd6Y4wxQHjrKKoDvwW8jwfS998dbJnqwDHt9UXkFtwdB8Bh+emn1eSzlpgZqAjsjHQQuYSdi6PsXBxl5+KoLA+uEc5EEay7z/TPafpZBlWdBEwCEJGlWW2GHm3sXBxl5+IoOxdH2bk4SkSWZr5UcOEseooHAisPagDpO0/3s4wxxpgICmei+B6IFZFaIlIEuBqYlW6ZWcD13tNPrYG9qnp8N5HGGGMiJmxFT6qaJCK3A58CBYFXVXWNiPT35k8E5gAXAj8D/wA3+Nj0pDCFnBfZuTjKzsVRdi6OsnNxVJbPRZ7rZtwYY0zOyr8dIRljjPHFEoUxxpiQcm2iCFv3H3mQj3PRxzsHK0VkiYg0i0ScOSGzcxGw3FkikiwiV+RkfDnJz7kQkQ4iskJE1ojIFzkdY07x8T9SRkRmi8iP3rnwUx+a54jIqyLyl4iszmB+1q6bqprrfnCV378AtYEiwI9Aw3TLXAh8gmuL0Rr4NtJxR/Bc/Aso573ulp/PRcBy83EPS1wR6bgj+HdRFlgLnOa9rxzpuCN4LoYDT3mvKwF/A0UiHXsYzkV74ExgdQbzs3TdzK13FGHp/iOPyvRcqOoSVd3tvf0G1x4lGvn5uwAYCEwD/srJ4HKYn3PRG/hQVbcCqGq0ng8/50KBUiIiQElcokjK2TDDT1UX4Y4tI1m6bubWRJFR1x4nukw0ONHjvBH3jSEaZXouRKQ6cCkwMQfjigQ/fxd1gXIislBElonI9TkWXc7ycy7GAQ1wDXpXAXeqakrOhJerZOm6mVvHo8i27j+igO/jFJGOuERxTlgjihw/52IMcK+qJrsvj1HLz7koBLQAzgNigK9F5BtV3Rju4HKYn3PRBVgBdALOAD4XkS9VdV+YY8ttsnTdzK2Jwrr/OMrXcYpIU2Ay0E1Vd+VQbDnNz7loCbzjJYmKwIUikqSqM3Ikwpzj939kp6oeBA6KyCKgGRBticLPubgBGKWuoP5nEfkVqA98lzMh5hpZum7m1qIn6/7jqEzPhYicBnwIXBeF3xYDZXouVLWWqtZU1ZrAB8BtUZgkwN//yEygnYgUEpHiuN6b1+VwnDnBz7nYiruzQkSq4HpS3ZSjUeYOWbpu5so7Cg1f9x95js9z8RBQAXjJ+yadpFHYY6bPc5Ev+DkXqrpORP4HrARSgMmqGvSxybzM59/FY8AUEVmFK365V1WjrvtxEXkb6ABUFJF44GGgMJzcddO68DDGGBNSbi16MsYYk0tYojDGGBOSJQpjjDEhWaIwxhgTkiUKY4wxIVmiMMfIrPfJgOXu93rhXOn1Tnp2NscxR0TKeq/vEJF1IjJVRHqE6jXWW36J97umiPT2ub9LROQh7/UIEfndO64VIjIqxHojRGSI7wMLvo2aIpLg7WutiEwUkRP63xSRliLygve6g4j8K2Be/+zoviPdeVkrItf4WGeQ14Yjs+XeEZHYk43RhIc9HmuOISLtgQO4jsMaZ7BMG+A5oIOqHhaRirieOMPSMl5E1uNanP96gut1AIao6sU+ll0C9FDVnSIyAjigqqN9rOd72RDbqAl8pKqNRaQQrufbMar6YRa3d9IxZbZd76K+DKigqokh1tkMtMyszYKInAtcq6o3Z2PIJpvYHYU5ho/eJwGq4rqGOOytszM1SYjIZhF5SkS+837qeNMricg0Efne+2nrTS8pIq+JyCrv7uTygO1UFJGJuO6jZ4nIXSLST0TGectUEZHp4sYY+DH1W7SIHPDiHIVrmbzCW/dLEYlLPQgR+UpEmopIXeBwqIuZiNzsxf2jdxzHfUv27nzWesfxjjethHeX9r2I/CAiwXq7DTz/ScASoI6InC4i87ztzRPXAh8RuVJEVnuxLPKmdRCRj7yk0x+4yzvudql3PSLSQETSuqzw7mRWeq9biMgX4joP/FQy6VFUVX/CNdgq560/QUSWirvLfCT1fADVgAUissCbdoGIfC0iy0XkfREp6W3yS+B8L1Ga3Can+0u3n9z/A9Qkg/7svfklcR2sbQReAs4NmLcZuN97fT3umzLAW8A53uvTgHXe66dw355T1y8XsJ2KQV73A8Z5r98FBnmvCwJlvNcHvN8dUvfvve+bui9cz6pLvdc3AM8GLDcC+N07xhW4DuUqBMwfCQwMWHaI93obUNR7Xdb7/QTumzK48SE2AiUyOt9AcVyXFN2A2UBfb/q/gRne61VA9XT7STvWwJiCxLgCqO29vhd4ANdydwlQyZt+Fa51c/rPPXA7ZwJfBswrH/A5LASaBvnsKgKLUo/f2/9DAdv4HGgR6b9/+zn+x+4ozAlT1QO4XklvAXYA74pIv4BF3g743cZ7fT4wTkRW4PqbKS0ipbzp4wO2vRv/OgETvPWSVXVvJsu/D1wsIoVxF94p3vSq3nEEel5V47yfT4HG3h3JKqAP0CjI9lcCU0XkWo6OdXABMMw77oVAMVyiTO8Mb5mvgI9V9RPcuXvLm/9fjvYK/BWuO4qbcRfmE/Ee0Mt7fRUu2dYDGuN6VF2BSx4ZjWlyl4hsAL7FJY5UvURkOfAD7tw0DLJua2/6V95++gKnB8z/C3cHYnIZu80zmRKRU3HfbgEmqutHKBl34VvoXTz7cvTCG1jxlfq6ANBGVRPSbVvIoe7hVfUfEfkcN3hLL1xPswAJQJlMVp8CXKKqP3pJsUOQZS7CjTDWA3hQRBrh+hW6XFU3ZLL9X1Q1LrND8I6jv7iHBy4CVgQWp/nwLvC+iHzoNqU/iUgTYI2qtslkXXAJdLSIXAa8ISJn4BLtEOAsVd0tIlNwCTE9AT5X1YwqwYvhPguTy9gdhcmUqv4W8O16oojUk2OfUIkDtgS8vyrg99fe68+A21MXCLi4pZ9e7gRCmwfc6q1XUERKp5u/HyiVbtpk4AXge1VNrYtZB9TJZF+lgO3e3Uif9DPFPaV0qqouAIbiiplK4jqqG+glRESkub9DA1xx0NXe6z7AYm8bZ6jqt6r6ELCTY7uNhuDHDYCq/gIkAw/ikgbABqCSuIcUEJHCXpLLkLqK9qW4LwilgYPAXnE9s3bLIJZvgLZytN6quFc/lKousCbUfk1kWKIwxxDX++TXQD0RiReRG4MsVhJ4PbXiFlecMCJgflER+Ra4E7jLm3YH0NKrmF2Lq3AFV95fLrVyFuh4AuHeCXT07miWcXxx0Eogyav0vQtAVZcB+4DXApZbBDRPvZhn4EFcccvnwPog8wsCb3qx/ID75r0H12tpYWCluEeOHzuB47sDuME7x9d5xwvwjLjK/9Ve7D+mW282cGlqZXaQ7b4LXIsrhkLd8KFXAE95n8EK3DjsmXkUuBtXZ/ID7iL/Kq5oLNUk4BMRWaCqO3B1TG97x/QNbkyI1K6/EzQ6hwrI8+zxWJOtxOfjkJEiItVwRWb1NWAoTBEZC8xW1bmRii0/8xL5PlV9JdKxmOPZHYXJN8Q1OvsW91RW+vGSn8A9cWQiYw/weqSDMMHZHYUxxpiQ7I7CGGNMSJYojDHGhGSJwhhjTEiWKIwxxoRkicIYY0xI/w9UR3NMHwwzpQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "## Plot ROC AUC Curve\n",
    "from sklearn.metrics import roc_auc_score,roc_curve\n",
    "plt.figure()\n",
    "\n",
    "# Add the models to the list that you want to view on the ROC plot\n",
    "auc_models = [\n",
    "{\n",
    "    'label': 'Gradient Boost Classifier',\n",
    "    'model':GradientBoostingClassifier(n_estimators=500,\n",
    "                                                        min_samples_split=20,\n",
    "                                                        max_depth=15,\n",
    "                                                        loss='exponential',\n",
    "                                                        criterion='mse'),\n",
    "    'auc':  0.9026\n",
    "},\n",
    "    \n",
    "]\n",
    "# create loop through all model\n",
    "for algo in auc_models:\n",
    "    model = algo['model'] # select the model\n",
    "    model.fit(X_train, y_train) # train the model\n",
    "# Compute False postive rate, and True positive rate\n",
    "    fpr, tpr, thresholds = roc_curve(y_test, model.predict_proba(X_test)[:,1])\n",
    "# Calculate Area under the curve to display on the plot\n",
    "    plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % (algo['label'], algo['auc']))\n",
    "# Custom settings for the plot \n",
    "plt.plot([0, 1], [0, 1],'r--')\n",
    "plt.xlim([0.0, 1.0])\n",
    "plt.ylim([0.0, 1.05])\n",
    "plt.xlabel('1-Specificity(False Positive Rate)')\n",
    "plt.ylabel('Sensitivity(True Positive Rate)')\n",
    "plt.title('Receiver Operating Characteristic')\n",
    "plt.legend(loc=\"lower right\")\n",
    "plt.savefig(\"auc.png\")\n",
    "plt.show() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
